{
 "cells": [
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 2、深层神经网络（DNN）分类模型",
   "id": "1a6c85778bf61dd1"
  },
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.582770Z",
     "start_time": "2025-03-07T13:21:00.580156Z"
    }
   },
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn\n",
    "from tqdm.auto import tqdm\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import os\n",
    "import sys\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "from torchvision import datasets\n",
    "from torchvision import transforms  # 将图片转换为tensor\n",
    "import torch.nn.functional as F"
   ],
   "outputs": [],
   "execution_count": 80
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 打印系统信息",
   "id": "1f9d7f5ef9f5b40e"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.612127Z",
     "start_time": "2025-03-07T13:21:00.609775Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print(sys.version_info)\n",
    "for module in mpl, np, pd, sklearn, torch:\n",
    "    print(module.__name__, module.__version__)\n",
    "\n",
    "device = torch.device(\"cuda:0\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
    "print(device)"
   ],
   "id": "cb10e18f5766dc8d",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sys.version_info(major=3, minor=12, micro=3, releaselevel='final', serial=0)\n",
      "matplotlib 3.9.1\n",
      "numpy 2.0.0\n",
      "pandas 2.2.2\n",
      "sklearn 1.6.1\n",
      "torch 2.6.0+cu126\n",
      "cuda:0\n"
     ]
    }
   ],
   "execution_count": 81
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 加载数据集以及数据预处理",
   "id": "c668a4ef75f01060"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.642853Z",
     "start_time": "2025-03-07T13:21:00.612127Z"
    }
   },
   "cell_type": "code",
   "source": [
    "\n",
    "transform = transforms.Compose([\n",
    "    transforms.ToTensor(),  # 将图片转换为tensor\n",
    "    transforms.Normalize((0.2860,), (0.3205,))  # 归一化\n",
    "])\n",
    "\n",
    "train_ds = datasets.FashionMNIST(\n",
    "    root=\"data\",\n",
    "    train=True,\n",
    "    download=True,\n",
    "    transform=transform\n",
    ")\n",
    "\n",
    "test_ds = datasets.FashionMNIST(\n",
    "    root=\"data\",\n",
    "    train=False,\n",
    "    download=True,\n",
    "    transform=transform\n",
    ")\n",
    "\n",
    "# torchvision 数据集里没有提供训练集和验证集的划分\n",
    "# 当然也可以用 torch.utils.data.Dataset 实现人为划分\n",
    "# 从数据集到dataloader\n",
    "batch_size = 32\n",
    "train_loader = torch.utils.data.DataLoader(train_ds, batch_size, shuffle=True)\n",
    "val_loader = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False)"
   ],
   "id": "cdf81ebed569ebf4",
   "outputs": [],
   "execution_count": 82
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 定义模型",
   "id": "ac6d8c94ed111ca8"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.651854Z",
     "start_time": "2025-03-07T13:21:00.642853Z"
    }
   },
   "cell_type": "code",
   "source": [
    "class NeuralNetwork(nn.Module):\n",
    "    def __init__(self, layers_num=2):\n",
    "        super().__init__()\n",
    "        self.transforms = transforms  # 预处理层，标准化\n",
    "        self.flatten = nn.Flatten()\n",
    "        # 多加几层\n",
    "        self.linear_relu_stack = nn.Sequential(\n",
    "            nn.Linear(28 * 28, 100),\n",
    "            nn.ReLU(),\n",
    "        )\n",
    "        # 加19层:所谓的DNN就是多个全连接层堆叠而成add_module()方法可以添加多个模块\n",
    "        for i in range(1, layers_num):\n",
    "            self.linear_relu_stack.add_module(f\"Linear_{i}\", nn.Linear(100, 100))\n",
    "            self.linear_relu_stack.add_module(f\"relu\", nn.ReLU())\n",
    "        # 输出层\n",
    "        self.linear_relu_stack.add_module(\"Output Layer\", nn.Linear(100, 10))\n",
    "\n",
    "        # 初始化权重\n",
    "        self.init_weights()\n",
    "\n",
    "    def init_weights(self):\n",
    "        \"\"\"使用 xavier 均匀分布来初始化全连接层的权重 W\"\"\"\n",
    "        # print('''初始化权重''')\n",
    "        for m in self.modules():\n",
    "            if isinstance(m, nn.Linear):  #判断m是否为全连接层\n",
    "                nn.init.xavier_uniform_(m.weight)  # xavier 均匀分布初始化权重\n",
    "                nn.init.zeros_(m.bias)  # 全零初始化偏置项\n",
    "        # print('''初始化权重完成''')\n",
    "\n",
    "    def forward(self, x):\n",
    "        # x.shape [batch size, 1, 28, 28]\n",
    "        # x = self.transforms(x)  #标准化\n",
    "        x = self.flatten(x)\n",
    "        # 展平后 x.shape [batch size, 28 * 28]\n",
    "        logits = self.linear_relu_stack(x)\n",
    "        # logits.shape [batch size, 10]\n",
    "        return logits\n",
    "\n",
    "\n",
    "model = NeuralNetwork(layers_num=10)\n",
    "total = 0\n",
    "# 计算模型参数数量\n",
    "for idx, (key, value) in enumerate(NeuralNetwork(20).named_parameters()):\n",
    "    # print(f\"Linear_{idx // 2:>02}\\tparamerters num: {np.prod(value.shape)}\") #np.prod是计算张量的元素个数\n",
    "    # print(f\"Linear_{idx // 2:>02}\\tshape: {value.shape}\")\n",
    "    total += np.prod(value.shape)\n",
    "total  #模型参数数量\n"
   ],
   "id": "dd323365c40aa6a6",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.int64(271410)"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 83
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 自定义损失函数和优化器",
   "id": "20e3452a91e25d4a"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.655519Z",
     "start_time": "2025-03-07T13:21:00.652857Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 1. 定义损失函数 采用交叉熵损失\n",
    "def mes_loss(array_1, array_2):  # mes: mean squared error\n",
    "    # return ((array_1 - array_2) ** 2).mean()\n",
    "    # 把array_1和array_2转化为batch_size*1的tensor\n",
    "    return nn.CrossEntropyLoss()\n",
    "\n",
    "# 2. 定义优化器 采用SGD\n",
    "# Optimizers specified in the torch.optim package\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)"
   ],
   "id": "22f030d2dbb1c736",
   "outputs": [],
   "execution_count": 84
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 定义评估函数",
   "id": "934c2422f99f4639"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.661751Z",
     "start_time": "2025-03-07T13:21:00.655519Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "\n",
    "@torch.no_grad()\n",
    "def evaluating(model, dataloader, loss_func):\n",
    "    loss_list = []\n",
    "    pred_list = []\n",
    "    label_list = []\n",
    "    for datas, labels in dataloader:\n",
    "        datas = datas.to(device)\n",
    "        labels = labels.to(device)\n",
    "        # 前向计算\n",
    "        logits = model(datas)\n",
    "        loss = loss_func(logits, labels)  # 验证集损失\n",
    "        loss_list.append(loss.item())\n",
    "\n",
    "        preds = logits.argmax(axis=-1)  # 验证集预测\n",
    "        pred_list.extend(preds.cpu().numpy().tolist())\n",
    "        label_list.extend(labels.cpu().numpy().tolist())\n",
    "\n",
    "    acc = accuracy_score(label_list, pred_list)\n",
    "    return np.mean(loss_list), acc\n"
   ],
   "id": "2c91772c89ffe099",
   "outputs": [],
   "execution_count": 85
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 训练函数",
   "id": "f4a4d0957eadc3e1"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.668197Z",
     "start_time": "2025-03-07T13:21:00.661751Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 训练\n",
    "def training(\n",
    "        model,\n",
    "        train_loader,\n",
    "        val_loader,\n",
    "        epoch,\n",
    "        loss_func,\n",
    "        optimizer,\n",
    "        tensorboard_callback=None,\n",
    "        save_ckpt_callback=None,\n",
    "        early_stop_callback=None,\n",
    "        eval_step=500,\n",
    "):\n",
    "    record_dict = {\n",
    "        \"train\": [],\n",
    "        \"val\": []\n",
    "    }\n",
    "\n",
    "    global_step = 0\n",
    "    model.train()\n",
    "    with tqdm(total=epoch * len(train_loader)) as pbar:\n",
    "        for epoch_id in range(epoch):\n",
    "            # training\n",
    "            for datas, labels in train_loader:\n",
    "                datas = datas.to(device)\n",
    "                labels = labels.to(device)\n",
    "                # 梯度清空\n",
    "                optimizer.zero_grad()\n",
    "                # 模型前向计算\n",
    "                logits = model(datas)\n",
    "                # 计算损失\n",
    "                loss = loss_func(logits, labels)\n",
    "                # 梯度回传\n",
    "                loss.backward()\n",
    "                # 调整优化器，包括学习率的变动等\n",
    "                optimizer.step()\n",
    "                preds = logits.argmax(axis=-1)\n",
    "\n",
    "                acc = accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())\n",
    "                loss = loss.cpu().item()\n",
    "                # record\n",
    "\n",
    "                record_dict[\"train\"].append({\n",
    "                    \"loss\": loss, \"acc\": acc, \"step\": global_step\n",
    "                })\n",
    "\n",
    "                # evaluating\n",
    "                if global_step % eval_step == 0:\n",
    "                    model.eval()\n",
    "                    val_loss, val_acc = evaluating(model, val_loader, loss_func)\n",
    "                    record_dict[\"val\"].append({\n",
    "                        \"loss\": val_loss, \"acc\": val_acc, \"step\": global_step\n",
    "                    })\n",
    "                    model.train()\n",
    "\n",
    "                    # 1. 使用 tensorboard 可视化\n",
    "                    if tensorboard_callback is not None:\n",
    "                        tensorboard_callback(\n",
    "                            global_step,\n",
    "                            loss=loss, val_loss=val_loss,\n",
    "                            acc=acc, val_acc=val_acc,\n",
    "                            lr=optimizer.param_groups[0][\"lr\"],\n",
    "                        )\n",
    "\n",
    "                    # 2. 保存模型权重 save model checkpoint\n",
    "                    if save_ckpt_callback is not None:\n",
    "                        save_ckpt_callback(global_step, model.state_dict(), metric=val_acc)\n",
    "\n",
    "                    # 3. 早停 Early Stop\n",
    "                    if early_stop_callback is not None:\n",
    "                        early_stop_callback(val_acc)\n",
    "                        if early_stop_callback.early_stop:\n",
    "                            print(f\"Early stop at epoch {epoch_id} / global_step {global_step}\")\n",
    "                            return record_dict\n",
    "\n",
    "                # udate step\n",
    "                global_step += 1\n",
    "                pbar.update(1)\n",
    "                pbar.set_postfix({\"epoch\": epoch_id})\n",
    "\n",
    "    return record_dict\n"
   ],
   "id": "e1a5883fb896c195",
   "outputs": [],
   "execution_count": 86
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.711201Z",
     "start_time": "2025-03-07T13:21:00.668197Z"
    }
   },
   "cell_type": "code",
   "source": [
    "epoch = 10\n",
    "loss_func = mes_loss\n",
    "model = model.to(device)\n",
    "record = training(\n",
    "    model,\n",
    "    train_loader,\n",
    "    val_loader,\n",
    "    epoch,\n",
    "    loss_func,\n",
    "    optimizer,\n",
    "    eval_step=len(train_loader)\n",
    ")"
   ],
   "id": "2ef7b075af3b4fe2",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  0%|          | 0/18750 [00:00<?, ?it/s]"
      ],
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "6fe2541c25a1481f914a17d0350045f9"
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "AttributeError",
     "evalue": "'CrossEntropyLoss' object has no attribute 'item'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[87], line 4\u001B[0m\n\u001B[0;32m      2\u001B[0m loss_func \u001B[38;5;241m=\u001B[39m mes_loss\n\u001B[0;32m      3\u001B[0m model \u001B[38;5;241m=\u001B[39m model\u001B[38;5;241m.\u001B[39mto(device)\n\u001B[1;32m----> 4\u001B[0m record \u001B[38;5;241m=\u001B[39m \u001B[43mtraining\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m      5\u001B[0m \u001B[43m    \u001B[49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m      6\u001B[0m \u001B[43m    \u001B[49m\u001B[43mtrain_loader\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m      7\u001B[0m \u001B[43m    \u001B[49m\u001B[43mval_loader\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m      8\u001B[0m \u001B[43m    \u001B[49m\u001B[43mepoch\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m      9\u001B[0m \u001B[43m    \u001B[49m\u001B[43mloss_func\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     10\u001B[0m \u001B[43m    \u001B[49m\u001B[43moptimizer\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     11\u001B[0m \u001B[43m    \u001B[49m\u001B[43meval_step\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mlen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtrain_loader\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m     12\u001B[0m \u001B[43m)\u001B[49m\n",
      "Cell \u001B[1;32mIn[86], line 40\u001B[0m, in \u001B[0;36mtraining\u001B[1;34m(model, train_loader, val_loader, epoch, loss_func, optimizer, tensorboard_callback, save_ckpt_callback, early_stop_callback, eval_step)\u001B[0m\n\u001B[0;32m     37\u001B[0m preds \u001B[38;5;241m=\u001B[39m logits\u001B[38;5;241m.\u001B[39margmax(axis\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m)\n\u001B[0;32m     39\u001B[0m acc \u001B[38;5;241m=\u001B[39m accuracy_score(labels\u001B[38;5;241m.\u001B[39mcpu()\u001B[38;5;241m.\u001B[39mnumpy(), preds\u001B[38;5;241m.\u001B[39mcpu()\u001B[38;5;241m.\u001B[39mnumpy())\n\u001B[1;32m---> 40\u001B[0m loss \u001B[38;5;241m=\u001B[39m \u001B[43mloss\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcpu\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mitem\u001B[49m()\n\u001B[0;32m     41\u001B[0m \u001B[38;5;66;03m# record\u001B[39;00m\n\u001B[0;32m     43\u001B[0m record_dict[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtrain\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mappend({\n\u001B[0;32m     44\u001B[0m     \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mloss\u001B[39m\u001B[38;5;124m\"\u001B[39m: loss, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124macc\u001B[39m\u001B[38;5;124m\"\u001B[39m: acc, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mstep\u001B[39m\u001B[38;5;124m\"\u001B[39m: global_step\n\u001B[0;32m     45\u001B[0m })\n",
      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1928\u001B[0m, in \u001B[0;36mModule.__getattr__\u001B[1;34m(self, name)\u001B[0m\n\u001B[0;32m   1926\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m name \u001B[38;5;129;01min\u001B[39;00m modules:\n\u001B[0;32m   1927\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m modules[name]\n\u001B[1;32m-> 1928\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mAttributeError\u001B[39;00m(\n\u001B[0;32m   1929\u001B[0m     \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mtype\u001B[39m(\u001B[38;5;28mself\u001B[39m)\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m object has no attribute \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mname\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m   1930\u001B[0m )\n",
      "\u001B[1;31mAttributeError\u001B[0m: 'CrossEntropyLoss' object has no attribute 'item'"
     ]
    }
   ],
   "execution_count": 87
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "#画线要注意的是损失是不一定在零到1之间的\n",
    "def plot_learning_curves(record_dict, sample_step=500):\n",
    "    # build DataFrame\n",
    "    train_df = pd.DataFrame(record_dict[\"train\"]).set_index(\"step\").iloc[::sample_step]\n",
    "    val_df = pd.DataFrame(record_dict[\"val\"]).set_index(\"step\")\n",
    "    # print(train_df.head())\n",
    "    # print(val_df.head())\n",
    "    # plot\n",
    "    fig_num = len(train_df.columns)  #因为有loss和acc两个指标，所以画个子图\n",
    "    fig, axs = plt.subplots(1, fig_num, figsize=(5 * fig_num, 5))  #fig_num个子图，figsize是子图大小\n",
    "    for idx, item in enumerate(train_df.columns):\n",
    "        #index是步数，item是指标名字\n",
    "        axs[idx].plot(train_df.index, train_df[item], label=f\"train_{item}\")\n",
    "        axs[idx].plot(val_df.index, val_df[item], label=f\"val_{item}\")\n",
    "        axs[idx].grid()\n",
    "        axs[idx].legend()\n",
    "        x_data = range(0, train_df.index[-1], 5000)  #每隔5000步标出一个点\n",
    "        axs[idx].set_xticks(x_data)\n",
    "        axs[idx].set_xticklabels(map(lambda x: f\"{int(x / 1000)}k\", x_data))  #map生成labal\n",
    "        axs[idx].set_xlabel(\"step\")\n",
    "\n",
    "    plt.show()\n",
    "\n",
    "\n",
    "plot_learning_curves(record, sample_step=500)  #横坐标是 steps"
   ],
   "id": "609d7d8c6d1471a6",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-07T13:21:00.712204Z",
     "start_time": "2025-03-07T13:21:00.712204Z"
    }
   },
   "cell_type": "code",
   "source": [
    "model.eval()\n",
    "loss, acc = evaluating(model, val_loader, loss_func)\n",
    "print(f\"loss:{loss:.4f}\\naccuracy:{acc:.4f}\")"
   ],
   "id": "a132c5e875313c06",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
